Validating Bias-Correction of CMIP6 Data

Bias corrected timelines for the different bias corrected variables will be compared against the reference observations (OISSTv2, SODA) to check how accurately they represent observed conditions, and how that changes across areas of interest.

Load Data

Target Areas

# Getting Timeseries and Shapefile Locations via gmRi
# returns path to oisst timeseries & path to shapefile:
region_groups <- c("nelme_regions", "gmri_sst_focal_areas", "lme", "nmfs_trawl_regions")
region_lookup <- map(region_groups, function(region_group){
    mask_details <- get_timeseries_paths(region_group) }) %>% 
  setNames(region_groups)

# Grab a couple to use from each as contenders

# # NELME_regions
# nelme_gom   <- read_sf(region_lookup$nelme_regions$GoM$shape_path)
# nelme_mab   <- read_sf(region_lookup$nelme_regions$SNEandMAB$shape_path) 
# nelme_nelme <- read_sf(region_lookup$nelme_regions$NELME$shape_path) 
# 
# # These have exaggerated detail and take forever to plot
# nelme_gom   <- st_simplify(nelme_gom)
# nelme_mab   <- st_simplify(nelme_mab)
# nelme_nelme <- st_simplify(nelme_nelme)
# 
# # SST Focal Areas
# gmri_nwa <- read_sf(region_lookup$gmri_sst_focal_areas$aak_northwest_atlantic$shape_path)
# gmri_gom <- read_sf(region_lookup$gmri_sst_focal_areas$apershing_gulf_of_maine$shape_path)
# gmri_cpr <- read_sf(region_lookup$gmri_sst_focal_areas$cpr_gulf_of_maine$shape_path)
# 
# # Large Marine Ecosystem
# lme_neus <- read_sf(region_lookup$lme$northeast_us_continental_shelf$shape_path)

# NMFS Trawl Regions
trawl_gb  <- read_sf(region_lookup$nmfs_trawl_regions$georges_bank$shape_path) 
trawl_gom <- read_sf(region_lookup$nmfs_trawl_regions$gulf_of_maine$shape_path)
trawl_mab <- read_sf(region_lookup$nmfs_trawl_regions$mid_atlantic_bight$shape_path)
trawl_sne <- read_sf(region_lookup$nmfs_trawl_regions$southern_new_england$shape_path)

# # Rebuild entire trawl area - internal structures removed
# trawl_full <- st_union(trawl_gb)  %>% 
#   st_union(st_union(trawl_gom)) %>% 
#   st_union(st_union(trawl_mab)) %>% 
#   st_union(st_union(trawl_sne)) %>% 
#   st_union()

# Load all the strata and just filter out the crap ones
trawl_full <- read_sf(str_c(res_path, "Shapefiles/BottomTrawlStrata/BTS_Strata.shp"))  %>% 
  clean_names() %>% 
  filter(strata >= 01010 ,
         strata <= 01760,
         strata != 1310,
         strata != 1320,
         strata != 1330,
         strata != 1350,
         strata != 1410,
         strata != 1420,
         strata != 1490) 



# DFO Data
dfo_path <- shared.path(group = "Mills Lab", folder = "Projects/DFO_survey_data/strata_shapefiles")
dfo_area <- read_sf(str_c(dfo_path, "MaritimesRegionEcosystemAssessmentBoundary.shp"))


# set overall zoom for maps
xlimz <- c(-76, -57)
ylimz <- c(35, 48)

# base map
base_map <- ggplot() +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  geom_sf(data = greenland) +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(color = "", 
       fill = "")

NMFS Trawl Areas

# Build off of base map
base_map +
  # geom_sf(data = trawl_full, aes(fill = "Trawl Survey Area")) +
  geom_sf(data = st_union(trawl_full), aes(fill = "Trawl Survey Area"), alpha = 0.2) +
  geom_sf(data = st_union(trawl_gom), aes(fill = "Gulf of Maine Survey Area"), alpha = 0.2) +
  coord_sf(xlim = xlimz, ylim = ylimz) +
  labs(title = "NMFS Trawl Region")

DFO Survey Areas

# Map dfo data 
base_map + 
  geom_sf(data = dfo_area, aes(fill = "DFO Ecosystems Assesment Boundary"), alpha = 0.2) +
  coord_sf(xlim = xlimz, ylim = ylimz) +
  labs(title = "DFO Region")

All Areas

base_map +
  geom_sf(data = st_union(trawl_full), aes(fill = "Trawl Survey Area"), alpha = 0.2) +
  geom_sf(data = st_union(trawl_gom), aes(fill = "Gulf of Maine Survey Area"), alpha = 0.2) +
  geom_sf(data = dfo_area, aes(fill = "DFO Ecosystems Assesment Boundary"), alpha = 0.2) +
  coord_sf(xlim = xlimz, ylim = ylimz) +
  labs(title = "All Areas")

SODA

# SODA vars
soda_ssal  <- stack(str_c(soda_path, "SODA_Salt_Red.nc"))
soda_bsal  <- stack(str_c(soda_path, "SODA_Salt_Red_bottomLayer.nc"))
soda_btemp <- stack(str_c(soda_path, "SODA_Temp_Red_bottomLayer.nc"))

OISSTv2

# OISST 
data_window <- data.frame(lon = c(-72, -61),
                          lat = c(39, 46.1),
                          time = as.Date(c("1981-09-01", "2020-12-31")))

# Load Data
oisst <- oisst_window_load(oisst_path = oisst_path, 
                           data_window = data_window, 
                           anomalies = FALSE)


# Convert to Monthly
oisst <- stack(oisst)

# build a key for months using the layer names
month_key <- seq.Date(min(data_window$time), max(data_window$time), by = "month")
month_key <- str_c("X", month_key) %>% str_replace_all("-", ".")
month_key <- str_sub(month_key, 1, -4)

# 
oisst_monthly <- map(month_key, function(wut_month){
  
  # Identify corresponding days in month
  which_layers  <- str_detect(names(oisst), wut_month)
  layer_indices <- which(which_layers)
  
  if(length(layer_indices) == 0) { print(str_c(length(layer_indices), " days for ", wut_month))  }
  
  # Get mean
  month_mean <- calc(oisst[[layer_indices]], fun = mean, na.rm = T)

  # set month as name
  month_mean <- setNames(month_mean, wut_month)
  return(month_mean)
  
  
}) 

# Crop them
oisst_monthly <- stack(oisst_monthly)

# clean up
rm(oisst)

Cropping Functions

####  Processing Functions
# Masking function for an area
mask_shape <- function(in_ras, in_mask){
  r1 <- crop(x = in_ras, y = in_mask)
  r2 <- mask(x = r1, mask = in_mask)
  return(r2)}

# Function to turn it into a dataframe using cellstats
timeseries_from_mask <- function(ras_in, var_name){
  ts <- cellStats(ras_in, mean, na.rm = T)
  ts <- ts %>% 
    as.data.frame() %>% 
    rownames_to_column() %>% 
    setNames(c("date", var_name)) %>% 
    mutate(date = str_remove(date, "X"),
           date = str_replace_all(date, "[.]", "-"))
  
  return(ts)
}

Processing Observed Variables

# Put observed variables in a list
observed_vars <- list(
  bot_temp  = soda_btemp,
  bot_sal   = soda_bsal,
  surf_sal  = soda_ssal,
  surf_temp = oisst_monthly)

# Now mask them all and get timeseries
masked_vars <- imap(observed_vars, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # Put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo)
  
})


# Get Time Periods to SODA/OISST
soda_years  <- unique(str_sub(names(soda_ssal), 2, 5))
oisst_years <- unique(str_sub(names(oisst_monthly), 2, 5))


# clean up
rm(soda_bsal, soda_btemp, soda_ssal, oisst_monthly)

CMIP6 Bias-Corrected

Mean

# CMIP Bias Corrected
cmip_stemp <- stack(str_c(cmip_path, "BiasCorrected/surf_temp_OISST_bias_corrected_mean.grd"))
cmip_btemp <- stack(str_c(cmip_path, "BiasCorrected/bot_temp_SODA_bias_corrected_mean.grd"))
cmip_ssal  <- stack(str_c(cmip_path, "BiasCorrected/surf_sal_SODA_bias_corrected_mean.grd"))
cmip_bsal  <- stack(str_c(cmip_path, "BiasCorrected/bot_sal_SODA_bias_corrected_mean.grd"))


# Cmip matches to SODA/OISST Time Periods
soda_index  <- which(str_sub(names(cmip_bsal), 2, 5) %in% soda_years)
oisst_index <- which(str_sub(names(cmip_stemp), 2, 5) %in% oisst_years)


# Put them in a list
cmip_vars <- list(
  bot_temp  = cmip_btemp[[soda_index]],
  bot_sal   = cmip_bsal[[soda_index]],
  surf_sal  = cmip_ssal[[soda_index]],
  surf_temp = cmip_stemp[[oisst_index]])

# Rotate them:
cmip_vars <- map(cmip_vars, raster::rotate)



# Now mask them all and get timeseries
masked_cmip <- imap(cmip_vars, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo
  )
  
})

5th Percentile

# CMIP Bias Corrected
cmip_stemp_5 <- stack(str_c(cmip_path, "BiasCorrected/surf_temp_OISST_bias_corrected_5thpercentile.grd"))
cmip_btemp_5 <- stack(str_c(cmip_path, "BiasCorrected/bot_temp_SODA_bias_corrected_5thpercentile.grd"))
cmip_ssal_5  <- stack(str_c(cmip_path, "BiasCorrected/surf_sal_SODA_bias_corrected_5thpercentile.grd"))
cmip_bsal_5  <- stack(str_c(cmip_path, "BiasCorrected/bot_sal_SODA_bias_corrected_5thpercentile.grd"))


# Put them in a list
cmip_vars_5 <- list(
  bot_temp  = cmip_btemp_5[[soda_index]],
  bot_sal   = cmip_bsal_5[[soda_index]],
  surf_sal  = cmip_ssal_5[[soda_index]],
  surf_temp = cmip_stemp_5[[oisst_index]])

# Rotate them:
cmip_vars_5 <- map(cmip_vars_5, raster::rotate)

# Now mask them all and get timeseries
masked_cmip_5 <- imap(cmip_vars_5, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo
  )
  
})

95th Percentile

# CMIP Bias Corrected
cmip_stemp_95 <- stack(str_c(cmip_path, "BiasCorrected/surf_temp_OISST_bias_corrected_95thpercentile.grd"))
cmip_btemp_95 <- stack(str_c(cmip_path, "BiasCorrected/bot_temp_SODA_bias_corrected_95thpercentile.grd"))
cmip_ssal_95  <- stack(str_c(cmip_path, "BiasCorrected/surf_sal_SODA_bias_corrected_95thpercentile.grd"))
cmip_bsal_95  <- stack(str_c(cmip_path, "BiasCorrected/bot_sal_SODA_bias_corrected_95thpercentile.grd"))


# Put them in a list
cmip_vars_95 <- list(
  bot_temp  = cmip_btemp_95[[soda_index]],
  bot_sal   = cmip_bsal_95[[soda_index]],
  surf_sal  = cmip_ssal_95[[soda_index]],
  surf_temp = cmip_stemp_95[[oisst_index]])

# Rotate them:
cmip_vars_95 <- map(cmip_vars_95, raster::rotate)


# Now mask them all and get timeseries
masked_cmip_95 <- imap(cmip_vars_95, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo
  )
  
})

Visual Comparisons

comparison_plot <- function(var_option, mask_option){
  
  obs_data  <- masked_vars[[var_option]][[mask_option]]
  cmip_mean <- masked_cmip[[var_option]][[mask_option]]
  cmip_5    <- masked_cmip_5[[var_option]][[mask_option]]
  cmip_95   <- masked_cmip_95[[var_option]][[mask_option]]
  
  # Add data source
  cmip_mean$data_source <- "CMIP Mean"
  cmip_5$data_source    <- "CMIP 5th Perc."
  cmip_95$data_source   <- "CMIP 95th Perc."
  
  # Format dates  
  cmip_mean$date <- as.Date(str_c(cmip_mean$date, "-15"))
  cmip_5$date    <- as.Date(str_c(cmip_5$date, "-15"))
  cmip_95$date   <- as.Date(str_c(cmip_95$date, "-15"))

  # Separate action for oisst/soda
  if(var_option == "surf_temp"){
    obs_data$date <- as.Date(str_c(obs_data$date, "-15"))
    obs_data$data_source <- "OISSTv2"

    } else{
    obs_data$date <- as.Date(obs_data$date)
    obs_data$data_source <- "SODA"
  }
  
  # format variable for display on axis
  var_label <- str_replace(var_option, "_", " ")
  var_label <- str_to_title(var_label)
  
  ggplot() +
     geom_line(data = obs_data, aes_string("date", var_option, color = "data_source")) +
     geom_line(data = cmip_mean, aes_string("date", var_option, color = "data_source")) +
     geom_line(data = cmip_5, aes_string("date", var_option, color = "data_source")) +
     geom_line(data = cmip_95, aes_string("date", var_option, color = "data_source")) +
     scale_color_gmri() +
     labs(x = "", y = var_label, color = "") +
     facet_zoom(xlim =c(as.Date("2000-01-01"), as.Date("2002-12-31")), zoom.size = .5) +
    theme(legend.position = "bottom")
  
}

Bottom Temperature

var_option <- "bot_temp"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Surface Temperature

var_option = "surf_temp"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Bottom Salinity

var_option <- "bot_sal"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Surface Salinity

var_option <- "surf_sal"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)